Scratch work
Code
import altair as alt
# Function to download geojson file
response1 = requests.get('https://raw.githubusercontent.com/arcee123/GIS_GEOJSON_CENSUS_TRACTS/master/11.geojson')
# Create background of D.C.
background = alt.Chart(alt.Data(values=response1.json())).mark_geoshape(
fill="lightgray",
stroke='white',
strokeWidth=1
).encode(
).properties(
width=600,
height=600
)
backgroundCode
| StateAbbr | StateDesc | CountyName | CountyFIPS | TractFIPS | TotalPopulation | ACCESS2_CrudePrev | ACCESS2_Crude95CI | ARTHRITIS_CrudePrev | ARTHRITIS_Crude95CI | ... | OBESITY_Crude95CI | PHLTH_CrudePrev | PHLTH_Crude95CI | SLEEP_CrudePrev | SLEEP_Crude95CI | STROKE_CrudePrev | STROKE_Crude95CI | TEETHLOST_CrudePrev | TEETHLOST_Crude95CI | Geolocation | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 8871 | DC | District of Columbia | District of Columbia | 11001 | 11001007604 | 3644 | 9.0 | ( 7.0, 11.3) | 24.3 | (22.9, 25.7) | ... | (30.2, 34.0) | 10.4 | ( 9.2, 11.7) | 39.0 | (37.0, 40.8) | 4.8 | ( 4.2, 5.5) | 15.5 | ( 9.8, 22.1) | POINT (-76.96379997 38.86610065) |
| 8884 | DC | District of Columbia | District of Columbia | 11001 | 11001005600 | 6756 | 5.5 | ( 4.4, 7.0) | 13.7 | (13.0, 14.4) | ... | (15.6, 18.2) | 5.9 | ( 5.3, 6.7) | 26.2 | (24.3, 28.2) | 2.0 | ( 1.8, 2.2) | 7.6 | ( 5.1, 10.9) | POINT (-77.05385751 38.90021244) |
| 8919 | DC | District of Columbia | District of Columbia | 11001 | 11001008402 | 2149 | 5.2 | ( 4.1, 6.6) | 15.9 | (15.2, 16.8) | ... | (19.8, 22.0) | 5.7 | ( 5.1, 6.4) | 28.8 | (27.5, 30.3) | 2.4 | ( 2.1, 2.8) | 7.2 | ( 4.1, 14.4) | POINT (-76.9910702 38.89861357) |
| 9058 | DC | District of Columbia | District of Columbia | 11001 | 11001001100 | 4779 | 3.3 | ( 3.0, 3.8) | 18.8 | (18.1, 19.6) | ... | (16.0, 17.5) | 5.2 | ( 4.9, 5.7) | 23.6 | (22.8, 24.4) | 2.0 | ( 1.9, 2.2) | 3.4 | ( 2.6, 5.2) | POINT (-77.07767045 38.95722636) |
| 9075 | DC | District of Columbia | District of Columbia | 11001 | 11001008002 | 3031 | 4.7 | ( 3.8, 6.1) | 13.4 | (12.7, 14.2) | ... | (19.4, 21.8) | 4.9 | ( 4.4, 5.6) | 28.9 | (27.6, 30.5) | 1.8 | ( 1.6, 2.0) | 5.6 | ( 3.3, 9.7) | POINT (-76.98275018 38.89154628) |
5 rows × 67 columns
Code
import plotly.express as px
import plotly.graph_objects as go
# Ingest GEOJSON file of census tracts in DC
req_dc = requests.get('https://raw.githubusercontent.com/arcee123/GIS_GEOJSON_CENSUS_TRACTS/master/11.geojson')
json_dc = req_dc.json()
hover_cols = ['ACCESS2_CrudePrev', 'ARTHRITIS_CrudePrev']
px.choropleth(dc_health_df,
geojson=json_dc,
locations='TractFIPS',
color='TotalPopulation',
featureidkey="properties.GEOID",
scope="usa",
center={'lat':38.8938005,'lon':-77.1579293},
hover_data = hover_cols,
fitbounds="locations",
height=700,
title = "DC Population")Code
# IMPORT RELEVANT LIBRARIES
import numpy as np
import pandas as pd
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
import plotly.offline as pyo
import numpy as np
import json
import requests
import numpy as np
import scipy.stats
import plotly.subplots as sp
from pathlib import PathCode
# Resetting defaults and import plotly libraries
import plotly.io as pio
pio.renderers.default = "browser"
import plotly.graph_objects as go
import plotly.express as px
import plotly.io as pio
pio.renderers.default = "plotly_mimetype+notebook_connected"
import statsmodels.api as sm
import numpy as np
from sklearn.metrics import r2_score
# isolate only columns with CrudePrev in the name
dc_health_df_prev = dc_health_df.filter(regex='CrudePrev')
df = dc_health_df_prev
# Rename columns
df = df.rename(columns={'ACCESS2_CrudePrev': '% of Adults without Health Insurance',
'ARTHRITIS_CrudePrev': '% of Adults with Arthritis',
'BINGE_CrudePrev': '% of Adults who Binge Drink',
'BPHIGH_CrudePrev': '% of Adults with High Blood Pressure',
'BPMED_CrudePrev': '% of Adults with High Blood Pressure who take Blood Pressure Medication',
'CANCER_CrudePrev': '% of Adults who were Diagnosed with Cancer',
'CASTHMA_CrudePrev': '% of Adults who were Diagnosed with Asthma',
'CERVICAL_CrudePrev': '% of Women who had a Pap Smear in the Past 3 Years',
'CHD_CrudePrev': '% of Adults who were Diagnosed with Coronary Heart Disease',
'CHECKUP_CrudePrev': '% of Adults who had a Routine Checkup in the Past Year',
'CHOLSCREEN_CrudePrev': '% of Adults who had Cholesterol Checked in the Past 5 Years',
'COLON_SCREEN_CrudePrev': '% of Adults who had a Colonoscopy or similar test in the Past 10 Years',
'COPD_CrudePrev': '% of Adults who were Diagnosed with COPD (Chronic Obstructive Pulmonary Disease)',
'COREM_CrudePrev': '% Prevalence of Older Adult Men aged >=65 years who are up to date on preventative health',
'COREW_CrudePrev': '% Prevalence of Older Adult Women aged >=65 years who are up to date on preventative health',
'CSMOKING_CrudePrev': '% of Adults who Currently Smoke',
'DENTAL_CrudePrev': '% of Adults who had a Dental Visit in the Past Year',
'DEPRESSION_CrudePrev': '% of Adults who were Diagnosed with Depression',
'DIABETES_CrudePrev': '% of Adults who were Diagnosed with Diabetes',
'GHLTH_CrudePrev': '% of Adults who reported their Health as not Good',
'HIGHCHOL_CrudePrev': '% of Adults who were Diagnosed with High Cholesterol',
'KIDNEY_CrudePrev': '% of Adults who were Diagnosed with Kidney Disease',
'LPA_CrudePrev': '% of Adults who are Physically Inactive',
'MAMMOUSE_CrudePrev': '% Women aged 50-74 years who had a Mammogram in the Past 2 Years',
'MHLTH_CrudePrev': '% of Adults who reported their Mental Health as not Good',
'OBESITY_CrudePrev': '% of Adults who were Obese',
'PHLTH_CrudePrev': '% of Adults who reported their Physical Health as not Good',
'SLEEP_CrudePrev': '% of Adults who reported their Sleep as not Good',
'STROKE_CrudePrev': '% of Adults who were Diagnosed with Stroke',
'TEETHLOST_CrudePrev': '% of Adults who have lost all of their Natural Teeth'})
# list of health metrics for drop down menu
column_names = df.columns
# Creating the initial scatter plot
fig = go.Figure(go.Scatter(x=df[column_names[0]], y=df[column_names[1]], mode='markers'))
# Label axes
fig.update_xaxes(title_text='X Axis')
fig.update_yaxes(title_text='Y Axis')
# Setting the range for x and y axes
fig.update_xaxes(range=[0, 100])
fig.update_yaxes(range=[0, 100])
for col in column_names:
for col2 in column_names:
x = df[col]
y = df[col2]
fig.add_trace(go.Scatter(x=x, y=y, mode='markers', name=col + ' vs ' + col2, showlegend=False, visible=False))
# Update the visibility of the traces
def update_visibility(selected_col, selected_col2):
for i, trace in enumerate(fig.data):
if trace.name == selected_col + ' vs ' + selected_col2:
trace.visible = True
elif trace.name == selected_col + ' vs ' + selected_col2 + ' Best Fit':
trace.visible = True
else:
trace.visible = False
# Create the drop-down menus for x (col) and y (col2) axes of the scatter plot
col_dropdown = [{'label': col, 'value': col} for col in column_names]
col2_dropdown = [{'label': col2, 'value': col2} for col2 in column_names]
# #Define the dropdown menu for x-axis
button_layer_1_height = 1.08
x_axis_dropdown = go.layout.Updatemenu(
buttons=list([dict(args=[{'x': [df[col]]}, update_visibility(col, col2)], label=col, method='update') for col in column_names]),
direction="down",
pad={"r": 10, "t": 10},
showactive=True,
x=0.06,
xanchor="left",
y=button_layer_1_height + 0.05,
yanchor="top"
)
# Define the dropdown menu for y-axis
y_axis_dropdown = go.layout.Updatemenu(
buttons=list([dict(args=[{'y': [df[col2]]}, update_visibility(col, col2)], label=col2, method='update') for col2 in column_names]),
direction="down",
pad={"r": 10, "t": 10},
showactive=True,
x=0.06,
xanchor="left",
y=button_layer_1_height,
yanchor="top"
)
# Update the layout to include the dropdown menus
fig.update_layout(
updatemenus=[x_axis_dropdown, y_axis_dropdown]
)
# Label axes
fig.update_xaxes(title_text='X Axis')
fig.update_yaxes(title_text='Y Axis')
# Setting the range for x and y axes
fig.update_xaxes(range=[0, 100])
fig.update_yaxes(range=[0, 100])
# Update plot sizing
fig.update_layout(
width=900,
height=900,
autosize=False,
#margin=dict(t=100, b=0, l=0, r=0),
)
# add annotations
fig.update_layout(
annotations=[
dict(
text="X Axis:",
x=0,
xref="paper",
y=button_layer_1_height + 0.025,
yref="paper",
align="left",
showarrow=False
),
dict(
text="Y Axis:",
x=0,
xref="paper",
y=button_layer_1_height - 0.025,
yref="paper",
align="left",
showarrow=False
)
]
)
# Change background color to grey
fig.update_layout(
plot_bgcolor='rgb(230, 230, 230)'
)
# Change scatter point color to red
fig.update_traces(
marker=dict(color='red')
)
# Change font to Calibri
fig.update_layout(
font=dict(family='Proxima Nova')
)
# # # Create a function to update the visibility of the traces based on selected columns
# def update_visibility(selected_col, selected_col2):
# for i, trace in enumerate(fig.data):
# trace.visible = (trace.name == selected_col + ' vs ' + selected_col2)
# trace.visible = (trace.name == selected_col + ' vs ' + selected_col2 + ' Best Fit')
# Display the scatter plot with dropdown menus
fig.show()Code
import numpy as np
x = df['% of Adults who Binge Drink']
y = df['% of Adults who reported their Physical Health as not Good']
# Find the coefficients of the linear regression line (i.e., the line of best fit)
coefficients = np.polyfit(x, y, 1)
# Create the line of best fit using the coefficients
line = np.poly1d(coefficients)
# Calculate the R-squared value
r2 = np.corrcoef(x, y)[0, 1]**2
print(line)
print(r2)
-0.6238 x + 20.94
0.7112826515828994
| StateAbbr | StateDesc | CountyName | CountyFIPS | census_tract | TotalPopulation | ACCESS2_CrudePrev | ACCESS2_Crude95CI | ARTHRITIS_CrudePrev | ARTHRITIS_Crude95CI | ... | no_health_insurance_perc | male_perc | female_perc | income_inequality_gini_index | hs_grad_perc | households_no_vehicle_perc | households_w_internet_perc | walkability_score | GEOID | neighborhood_name | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | DC | District of Columbia | District of Columbia | 11001 | 11001007604 | 3644 | 9.0 | ( 7.0, 11.3) | 24.3 | (22.9, 25.7) | ... | 3.4 | 44.1 | 55.9 | 0.5617 | 88.7 | 33.7 | 76.6 | 11.666667 | 1.100101e+10 | NAYLOR/HILLCREST |
| 1 | DC | District of Columbia | District of Columbia | 11001 | 11001005600 | 6756 | 5.5 | ( 4.4, 7.0) | 13.7 | (13.0, 14.4) | ... | 2.2 | 50.6 | 49.4 | 0.5699 | 99.0 | 60.6 | 91.4 | 15.916667 | 1.100101e+10 | GWU |
| 2 | DC | District of Columbia | District of Columbia | 11001 | 11001008402 | 2149 | 5.2 | ( 4.1, 6.6) | 15.9 | (15.2, 16.8) | ... | 3.6 | 43.2 | 56.8 | 0.4920 | 96.6 | 38.6 | 86.9 | 15.500000 | 1.100101e+10 | UNION STATION |
| 3 | DC | District of Columbia | District of Columbia | 11001 | 11001001100 | 4779 | 3.3 | ( 3.0, 3.8) | 18.8 | (18.1, 19.6) | ... | 2.3 | 44.0 | 56.0 | 0.5019 | 99.3 | 24.1 | 88.2 | 17.750000 | 1.100100e+10 | TENLEYTOWN |
| 4 | DC | District of Columbia | District of Columbia | 11001 | 11001008002 | 3031 | 4.7 | ( 3.8, 6.1) | 13.4 | (12.7, 14.2) | ... | 0.3 | 46.8 | 53.2 | 0.4067 | 95.9 | 18.7 | 85.2 | 15.583333 | 1.100101e+10 | KINGMAN PARK |
5 rows × 84 columns
Code
# Resetting defaults and import plotly libraries
import plotly.io as pio
pio.renderers.default = "browser"
import plotly.graph_objects as go
import plotly.express as px
import plotly.io as pio
pio.renderers.default = "plotly_mimetype+notebook_connected"
# isolate only columns with CrudePrev in the name
dc_health_df_prev = df_merged.filter(regex='CrudePrev')
# add the walkability_score column back in
dc_health_df_prev['walkability_score'] = df_merged['walkability_score']
df = dc_health_df_prev
# Rename columns
df = df.rename(columns={'ACCESS2_CrudePrev': '% of Adults without Health Insurance',
'ARTHRITIS_CrudePrev': '% of Adults with Arthritis',
'BINGE_CrudePrev': '% of Adults who Binge Drink',
'BPHIGH_CrudePrev': '% of Adults with High Blood Pressure',
'BPMED_CrudePrev': '% of Adults with High Blood Pressure who take Blood Pressure Medication',
'CANCER_CrudePrev': '% of Adults who were Diagnosed with Cancer',
'CASTHMA_CrudePrev': '% of Adults who were Diagnosed with Asthma',
'CERVICAL_CrudePrev': '% of Women who had a Pap Smear in the Past 3 Years',
'CHD_CrudePrev': '% of Adults who were Diagnosed with Coronary Heart Disease',
'CHECKUP_CrudePrev': '% of Adults who had a Routine Checkup in the Past Year',
'CHOLSCREEN_CrudePrev': '% of Adults who had Cholesterol Checked in the Past 5 Years',
'COLON_SCREEN_CrudePrev': '% of Adults who had a Colonoscopy or similar test in the Past 10 Years',
'COPD_CrudePrev': '% of Adults who were Diagnosed with COPD (Chronic Obstructive Pulmonary Disease)',
'COREM_CrudePrev': '% Prevalence of Older Adult Men aged >=65 years who are up to date on preventative health',
'COREW_CrudePrev': '% Prevalence of Older Adult Women aged >=65 years who are up to date on preventative health',
'CSMOKING_CrudePrev': '% of Adults who Currently Smoke',
'DENTAL_CrudePrev': '% of Adults who had a Dental Visit in the Past Year',
'DEPRESSION_CrudePrev': '% of Adults who were Diagnosed with Depression',
'DIABETES_CrudePrev': '% of Adults who were Diagnosed with Diabetes',
'GHLTH_CrudePrev': '% of Adults who reported their Health as not Good',
'HIGHCHOL_CrudePrev': '% of Adults who were Diagnosed with High Cholesterol',
'KIDNEY_CrudePrev': '% of Adults who were Diagnosed with Kidney Disease',
'LPA_CrudePrev': '% of Adults who are Physically Inactive',
'MAMMOUSE_CrudePrev': '% Women aged 50-74 years who had a Mammogram in the Past 2 Years',
'MHLTH_CrudePrev': '% of Adults who reported their Mental Health as not Good',
'OBESITY_CrudePrev': '% of Adults who were Obese',
'PHLTH_CrudePrev': '% of Adults who reported their Physical Health as not Good',
'SLEEP_CrudePrev': '% of Adults who reported their Sleep as not Good',
'STROKE_CrudePrev': '% of Adults who were Diagnosed with Stroke',
'TEETHLOST_CrudePrev': '% of Adults who have lost all of their Natural Teeth'})
# list of health metrics for drop down menu
column_names = df.columns
# Creating the initial scatter plot
fig = go.Figure(go.Scatter(x=df[column_names[0]], y=df[column_names[1]], mode='markers'))
# Label axes
fig.update_xaxes(title_text='Walkability Score')
fig.update_yaxes(title_text='Y Axis')
# Setting the range for x and y axes
#fig.update_xaxes(range=[0, 100])
fig.update_xaxes(range=[0, max(df['walkability_score'])])
fig.update_yaxes(range=[0, 100])
for col in column_names:
fig.add_trace(go.Scatter(x=df['walkability_score'], y=df[col], mode='markers', name='Walkability vs ' + col, visible=False))
def update_visibility(selected_col, selected_col2):
return [(trace.name == selected_col + ' vs ' + selected_col2) for trace in fig.data]
# Create the drop-down menus for x (col) and y (col2) axes of the scatter plot
col_dropdown = [{'label': col, 'value': col} for col in column_names]
# Define the dropdown menu for x-axis
button_layer_1_height = 1.08
y_axis_dropdown = go.layout.Updatemenu(
buttons=list([
dict(
args=[
{"y": [df[col]], "visible": [(trace.name == "Walkability vs " + col) for trace in fig.data]}
],
label=col,
method="update",
) for col in column_names
]),
direction="down",
pad={"r": 10, "t": 10},
showactive=True,
x=0.06,
xanchor="left",
y=button_layer_1_height,
yanchor="top"
)
# Update the layout to include the dropdown menus
fig.update_layout(
updatemenus=[y_axis_dropdown]
)
# Label axes
fig.update_xaxes(title_text='X Axis')
fig.update_yaxes(title_text='Y Axis')
# Update plot sizing
fig.update_layout(
width=900,
height=900,
autosize=False,
#margin=dict(t=100, b=0, l=0, r=0),
)
# add annotations
fig.update_layout(
annotations=[
dict(
text="X Axis: Walkability Score of the Neighborhood",
x=0,
xref="paper",
y=button_layer_1_height + 0.025,
yref="paper",
align="left",
showarrow=False
),
dict(
text="Y Axis:",
x=0,
xref="paper",
y=button_layer_1_height - 0.025,
yref="paper",
align="left",
showarrow=False
)
]
)
def update_visibility(selected_col, selected_col2):
return [(trace.name == selected_col + ' vs ' + selected_col2) for trace in fig.data]
# Change background color to grey
fig.update_layout(
plot_bgcolor='rgb(230, 230, 230)'
)
# Change scatter point color to red
fig.update_traces(
marker=dict(color='red')
)
# Change font to Calibri
fig.update_layout(
font=dict(family='Proxima Nova')
)
# Display the scatter plot with dropdown menus
fig.show()/var/folders/xw/lth4lxb57psc5_ks7_nq1zj40000gn/T/ipykernel_41309/2832497246.py:12: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy